我們計畫透過Youbike的使用資料,來研究這樣的喝咖啡政策是不是真的有顯著效果,以及分析台北市各個區域間單車騎乘率,研究咖啡站的選址是否合乎效率。
政策背景: 北市府為提倡節能減炭,訂定每周五為綠色運輸日,只要周五騎自行車通勤上班就請你喝咖啡,每周五上午7時30分至9時30分擺設攤位由原六個增加為12個,供騎單車市民領取咖啡點心,共同響應周五綠色運輸日,我們希望檢視此活動是否真的能使綠色運輸日當天使用自行車通勤數量增加。
Note:這份notebook主要先透過探索政府即時公開資料,試圖觀察Youbike使用變化,所以尚未涉及回歸模型。
library(jsonlite)
library(httr)
library(gplots)
require(ggplot2)
library(readr)
library(plm)
library(stringr)
library(readr)
library(data.table)
RAW <- read_csv("~/Google Drive/Course/Working_on/內生性報告/ubike/ubikedf2.csv")
ubike.df <- RAW
ubike.df$"X1"<-NULL
url <- "http://data.taipei/youbike"
ubike.list <- fromJSON(content(GET(url),"text"))
cname <- names(ubike.list$retVal$`0001`)
names(ubike.df) <- c(cname,"Time")
ubike.df<-ubike.df[!is.na(ubike.df$sbi),]
ubike.df["sat"]<-ubike.df$sbi/ubike.df$tot
ubike.df$"sno"<-as.factor(ubike.df$"sno")
ubike.df$Time<-as.POSIXct(ubike.df$Time)
ubike.df["hour"]<-format(ubike.df$Time,"%H")
ubike.df["date"]<-format(ubike.df$Time,"%d")
ubike.df["min"]<-format(ubike.df$Time,"%M")
Sys.setlocale("LC_TIME", "en_US")
ubike.df["dow"]<-weekdays(ubike.df$Time)
ubike.df$hour<-as.numeric(ubike.df$hour)
ubike.df$date<-as.numeric(ubike.df$date)
ubike.df["position"]<-paste (ubike.df$lat,ubike.df$lng,sep = ",", collapse = NULL)
coffeStand<-sort(c(173,175,8,45,87,22,115,57,134,238,172,187))
ubike.df$"coffee"=ifelse(ubike.df$sno %in% coffeStand,1,0)
ubike.df["station"]<-"tpe"
ubike.df$"station"[ubike.df$sareaen=="Xinyi Dist."]<-"xyi"
ubike.df$"station"[ubike.df$sareaen=="Nangang Dist."]<-"nag"
ubike.df$"station"[ubike.df$sareaen=="Zhongzheng Dist."]<-"tpe"
ubike.df$"station"[ubike.df$sareaen=="Wanhua Dist."]<-"tpe"
ubike.df$"station"[ubike.df$sareaen=="Wenshan Dist."]<-"wen"
ubike.df$"station"[ubike.df$sareaen=="Daan Dist."]<-"xyi"
ubike.df$"station"[ubike.df$sareaen=="Shilin Dist."]<-"tia"
ubike.df$"station"[ubike.df$sareaen=="Beitou Dist."]<-"tia"
ubike.df$"station"[ubike.df$sareaen=="Songshan Dist."]<-"ssh"
ubike.df
plotmeans(sat ~ sareaen, main="Heterogeineity across area", data=ubike.df)
plotmeans(sat ~ sno, main="Heterogeineity across sno", data=ubike.df)
#drop 6,82,139,252
ubike.df<-ubike.df[!ubike.df$sno %in% c(6,82,139,252),]
plotmeans(sat ~ sno, main="Heterogeineity across no.", data=ubike.df)
plotmeans(sat ~ sareaen, main="Heterogeineity across area", data=ubike.df)
plotmeans(sat ~ dow, main="Heterogeineity across day of week", data=ubike.df)
sta1<-ubike.df[ubike.df$sno==1,]
plot(sta1$Time,sta1$sat)
sta1<-ubike.df[ubike.df$sno==22,]
plot(sta1$Time,sta1$sat)
station<-c("taipei","tianmu","neihu","xinyi","songshan","gongguan","dazhi","wenshan","nangang")
k=0
for(i in 1:length(station))
{
wheaWD<-"/Users/maxchen/Google Drive/Course/Working_on/內生性報告/weather/"
wheaWD<-paste(wheaWD,station[i],sep = "", collapse = NULL)
setwd(wheaWD)
files = list.files(pattern="*.csv")
temp = do.call(rbind, lapply(files,function(x){
read.csv(x,fileEncoding = "UTF-8", stringsAsFactors = FALSE)
}))
if(k==0)
{
weather<-temp
k=1
}
else
{
weather<-rbind(weather,temp)
}
}
colnames(weather) <-c("Obstime","StnPre","SeaPre","temp_hour","Tddewpoint","humidity","wind_speed", "wind_degree","WSGust","WDGust", "precp","precpHour","sunshine","Globrad","visb","year","month","day","hour","Time","station")
weather$X<-NULL
weather$temp_hour<-as.numeric(weather$temp_hour)
weather$humidity<-as.numeric(weather$humidity)
weather$wind_speed<-as.numeric(weather$wind_speed)
weather$precp<-as.numeric(weather$precp)
colnames(ubike.df)[18] <- "day"
total <- merge(ubike.df,weather, by = c("day","hour","station") , all.x=TRUE )
total["rain"]<-0
total$rain[total$precp>0]<-1
colnames(total)[18] <- "Time"
total
sta1<-total[total$sno==22,]
sta1<-total[total$sno==1,]
sta1<-sta1[order(sta1$Time , decreasing = FALSE ),]
sta1$"diff"<-0
for(i in 2:nrow(sta1))
{
sta1$diff[i]<-abs(sta1$sbi[i]-sta1$sbi[i-1])
}